Code for DATASET 1

Generate a null model of a stratigraphic column containing a defined number of occupation layers, each containing n artifacts randomly distributed in x-y space as drawn from a uniform distribution

Function that creates a layer simulating a single occupation assuming artifacts are entirely randomly spatially distributed:

randlayer <- function(i) {
  data.frame(x=runif(20, min=0, max=200), y=runif(20, min=0, max=200), id=i)
}

Loops 100 times simulating 100 occupation instances with randomly distributed artifacts. Generates a list:

rand_time_average <- lapply(1:100, FUN=randlayer)

Binds all the layers into a single data frame from the list, simulating a strat column with randomly distributed artifacts over 100 occupation instances:

rand_stratcol <- do.call(rbind, args=rand_time_average)

Creates a scatterplot of all of the points in the strat column in the same x-y plane:

library(ggplot2)
qplot(data=rand_stratcol, x=rand_stratcol$x, y=rand_stratcol$y, main="Randomized Stratigraphic Column", xlab="X Distance (cm)", ylab="Y Distance (cm)") + xlim(0,200) + ylim(0,200)

Subsets the dataframe by a set of layers:

grouped_rand_stratcol <- subset(rand_stratcol, id > 1 & id < 5)

Creates a scatterplot of the subsetted layers dataframe:

qplot(data=grouped_rand_stratcol, x=grouped_rand_stratcol$x, y=grouped_rand_stratcol$y, main="Randomized Stratigraphic Column", xlab="X Distance (cm)", ylab="Y Distance (cm)") + xlim(0,200) + ylim(0,200)

Code for DATASET 2

Generate a model of a stratigraphic column containing a defined number of occupation layers, each containing n artifacts distributed in x-y space as drawn from a normal distribution with a fixed mean.

Function to generate a sample from a truncated normal distribution:

mytruncnorm <- function(n,a=0,b=200,mean,sd,bign=1000) {
  if(bign < n) stop("n is less than total number of values generated")
  bignvect <- rnorm(bign,mean=mean,sd=sd)
  nvect <- subset(bignvect, bignvect > a & bignvect < b)
  sample(nvect, n, replace=TRUE)
}

Function that creates a layer simulating a single occupation assuming there is an area of greater likelihood of finding artifacts assuming a fixed mean:

ideallocuslayer <- function(i) {
  data.frame(x=mytruncnorm(20, a=0, b=200, mean=150, sd=20), y=(mytruncnorm(20, a=0, b=200, mean=100, sd=20)), id=i)
}

Loops 100 times simulating 100 occupation instances with artifacts normally distributed about a mean that is fixed:

idealnorm_time_average <- lapply(1:100, FUN=ideallocuslayer)

Binds all the layers into a single data frame from the list, simulating a strat column that is normally distributed about a mean that is fixed over 100 occupation instances:

idealnorm_stratcol <- do.call(rbind, args=idealnorm_time_average)

Creates a scatterplot of all of the points in the strat column in the same x-y plane:

library(ggplot2)
qplot(data=idealnorm_stratcol, x=idealnorm_stratcol$x, y=idealnorm_stratcol$y, main="Fixed Locus Stratigraphic Column", xlab="X Distance (cm)", ylab="Y Distance (cm)") + xlim(0,200) + ylim(0,200)

Subsets the dataframe by a set of layers:

grouped_idealnorm_stratcol <- subset(idealnorm_stratcol, id > 1 & id < 5)

Creates a scatterplot of the subsetted layers dataframe:

qplot(data=grouped_idealnorm_stratcol, x=grouped_idealnorm_stratcol$x, y=grouped_idealnorm_stratcol$y, main="Fixed Locus Stratigraphic Column", xlab="X Distance (cm)", ylab="Y Distance (cm)") + xlim(0,200) + ylim(0,200)

Code for DATASET 3

Generate a model of a stratigraphic column containing a defined number of occupation layers, each containing n artifacts distributed in x-y space as drawn from a normal distribution in which the mean values change slightly in each layer. This is meant to simulate successive occupation in which a feature that draws an increased likelihood of artifact deposition (i.e. a hearth) is not in the exact same location in each occupation.

Function to generate a sample from a truncated normal distribution:

mytruncnorm <- function(n,a=0,b=200,mean,sd,bign=1000) {
  if(bign < n) stop("n is less than total number of values generated")
  bignvect <- rnorm(bign,mean=mean,sd=sd)
  nvect <- subset(bignvect, bignvect > a & bignvect < b)
  sample(nvect, n, replace=TRUE)
}

Function to generate a random x value in a certain range for the mean of the normal distribution:

varymeanx <- function() {
  sample(145:155, 1, replace=TRUE)
}

Function to generate a random y value in a certain range for the mean of the normal distribution:

varymeany <- function() {
  sample(95:105, 1, replace=TRUE)
}

Function that creates a layer simulating a single occupation assuming there is an area of greater likelihood of finding artifacts:

locuslayer <- function(i) {
  data.frame(x=mytruncnorm(20, a=0, b=200, mean=varymeanx(), sd=20), y=(mytruncnorm(20, a=0, b=200, mean=varymeany(), sd=20)), id=i)
}

Loops 100 times simulating 100 occupation instances with artifacts normally distributed about a mean that varies within a range:

norm_time_average <- lapply(1:100, FUN=locuslayer)

Binds all the layers into a single data frame from the list, simulating a strat column that is normally distributed about a mean that varies within a range over 100 occupation instances:

norm_stratcol <- do.call(rbind, args=norm_time_average)

Creates a scatterplot of all of the points in the strat column in the same x-y plane:

library(ggplot2)
qplot(data=norm_stratcol, x=norm_stratcol$x, y=norm_stratcol$y, main="Variable Locus Stratigraphic Column", xlab="X Distance (cm)", ylab="Y Distance (cm)") + xlim(0,200) + ylim(0,200)

Subsets the dataframe by a set of layers:

grouped_norm_stratcol <- subset(norm_stratcol, id > 1 & id < 5)

Creates a scatterplot of the subsetted layers dataframe:

qplot(data=grouped_norm_stratcol, x=grouped_norm_stratcol$x, y=grouped_norm_stratcol$y, main="Variable Locus Stratigraphic Column", xlab="X Distance (cm)", ylab="Y Distance (cm)") + xlim(0,200) + ylim(0,200)

Code for DATASET 4

Import a real dataset with artifact coordinates in a site acquired from Dave Braun from Koobi Fora, Kenya.

Import the datasets:

FwJj20_finds <- read.csv("~/Desktop/FwJj20_finds.csv")
FXJJ20AB <- read.csv("~/Desktop/FXJJ20AB_2013_july_13_points.csv")

Subset/transform into usable data frames:

###For FWJJ20
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
FwJj20_df <- select(FwJj20_finds,N:Type)
colnames(FwJj20_df)[1] <- 'y'
colnames(FwJj20_df)[2] <- 'x'
colnames(FwJj20_df)[3] <- 'Depth'
FwJj20_df <- FwJj20_df[,c(2,1,3,4)]
FwJj20_df_sub <- subset(FwJj20_df, x>=982.5 & y>=936)

###For FXJJ20AB
FXJJ20AB_df <- select(FXJJ20AB,LOT,DESCRIPTION,X,Y,Z)
colnames(FXJJ20AB_df)[1] <- "Lot"
colnames(FXJJ20AB_df)[2] <- "Type"
colnames(FXJJ20AB_df)[3] <- "x"
colnames(FXJJ20AB_df)[4] <- "y"
colnames(FXJJ20AB_df)[5] <- "Depth"
FXJJ20AB_df_sub <- subset(FXJJ20AB_df, x>=501 & y>=500)

Creates a scatterplot of all points in each subsetted strat column:

###For FWJJ20
library(ggplot2)
qplot(data=FwJj20_df_sub, x=FwJj20_df_sub$x, y=FwJj20_df_sub$y, main="FWJJ20 Subsetted Real Stratigraphic Column", xlab="X Coordinate", ylab="Y Coordinate") + xlim(982.5,988) + ylim(936,941)

###For FXJJ20AB
qplot(data=FXJJ20AB_df_sub, x=FXJJ20AB_df_sub$x, y=FXJJ20AB_df_sub$y, main="FXJJ20AB Subsetted Real Stratigraphic Column", xlab="X Coordinate", ylab="Y Coordinate") + xlim(501,505) + ylim(500,505)
## Warning: Removed 9 rows containing missing values (geom_point).

Code for Spatial Statistical Analysis

Formatting data for spatstat package (creating a point pattern object)

library(spatstat)
## Loading required package: nlme
## 
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
## 
##     collapse
## Loading required package: rpart
## 
## spatstat 1.47-0       (nickname: 'Responsible Gambler') 
## For an introduction to spatstat, type 'beginner'

For DATASET 1

rand_stratcol_pattern <- ppp(rand_stratcol[,1], rand_stratcol[,2], c(0,200), c(0,200))

For DATASET 1 (grouped by layers)

grouped_rand_stratcol_pattern <- ppp(grouped_rand_stratcol[,1], grouped_rand_stratcol[,2], c(0,200), c(0,200))

For DATASET 2

idealnorm_stratcol_pattern <- ppp(idealnorm_stratcol[,1], idealnorm_stratcol[,2], c(0,200), c(0,200))

For DATASET 2 (grouped by layers)

grouped_idealnorm_stratcol_pattern <- ppp(grouped_idealnorm_stratcol[,1], grouped_idealnorm_stratcol[,2], c(0,200), c(0,200))

For DATASET 3

norm_stratcol_pattern <- ppp(norm_stratcol[,1], norm_stratcol[,2], c(0,200), c(0,200))

For DATASET 3 (grouped by layers)

grouped_norm_stratcol_pattern <- ppp(grouped_norm_stratcol[,1], grouped_norm_stratcol[,2], c(0,200), c(0,200))

For DATASET 4(FWJJ20)

FwJj20_df_sub_pattern <- ppp(FwJj20_df_sub[,1], FwJj20_df_sub[,2], c(982.5,988), c(936,941))
## Warning in ppp(FwJj20_df_sub[, 1], FwJj20_df_sub[, 2], c(982.5, 988),
## c(936, : data contain duplicated points

For DATASET 4(FXJJ20AB)

FXJJ20AB_df_sub_pattern <- ppp(FXJJ20AB_df_sub[,3], FXJJ20AB_df_sub[,4], c(501,505), c(500,519))

Ripley’s K-function Plots

Generates K-function plots for each point pattern object

For DATASET 1

Kestplot_1 <- plot(Kest(rand_stratcol_pattern), main="K-function Estimate for DATASET 1 (random strat column)")

#OR

Kest_envplot_1 <- plot(envelope(rand_stratcol_pattern, Kest, nsim = 99, correction = "trans"), main="K-function Estimate for DATASET 1 (random strat column)")
## Generating 99 simulations of CSR  ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,  99.
## 
## Done.

For DATASET 1 (grouped by layers)

Kestplot_1a <- plot(Kest(grouped_rand_stratcol_pattern), main="K-function Estimate for DATASET 1 (subset of random strat column)")

#OR

Kest_envplot_1a <- plot(envelope(grouped_rand_stratcol_pattern, Kest, nsim = 99, correction = "trans"), main="K-function Estimate for DATASET 1 (subset of random strat column)")
## Generating 99 simulations of CSR  ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,  99.
## 
## Done.

For DATASET 2

Kestplot_2 <- plot(Kest(idealnorm_stratcol_pattern), main="K-function Estimate for DATASET 2 (ideal norm strat column)")

#OR

Kest_envplot_2 <- plot(envelope(idealnorm_stratcol_pattern, Kest, nsim = 99, correction = "trans"), main="K-function Estimate for DATASET 2 (ideal norm strat column)")
## Generating 99 simulations of CSR  ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,  99.
## 
## Done.

For DATASET 2 (grouped by layers)

Kestplot_2a <- plot(Kest(grouped_idealnorm_stratcol_pattern), main="K-function Estimate for DATASET 2 (grouped ideal norm strat column)")

#OR

Kest_envplot_2a <- plot(envelope(grouped_idealnorm_stratcol_pattern, Kest, nsim = 99, correction = "trans"), main="K-function Estimate for DATASET 2 (grouped ideal norm strat column)")
## Generating 99 simulations of CSR  ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,  99.
## 
## Done.

For DATASET 3

Kestplot_3 <- plot(Kest(norm_stratcol_pattern), main="K-function Estimate for DATASET 3 (variable norm strat column)")

#OR

Kest_envplot_3 <- plot(envelope(norm_stratcol_pattern, Kest, nsim = 99, correction = "trans"), main="K-function Estimate for DATASET 3 (variable norm strat column)")
## Generating 99 simulations of CSR  ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,  99.
## 
## Done.

For DATASET 3 (grouped by layers)

Kestplot_3a <- plot(Kest(grouped_norm_stratcol_pattern), main="K-function Estimate for DATASET 3 (grouped variable norm strat column)")

#OR

Kest_envplot_3a <- plot(envelope(grouped_norm_stratcol_pattern, Kest, nsim = 99, correction = "trans"), main="K-function Estimate for DATASET 3 (grouped variable norm strat column)")
## Generating 99 simulations of CSR  ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,  99.
## 
## Done.

For DATASET 4(FWJJ20)

Kestplot_4_FWJJ20sub <- plot(Kest(FwJj20_df_sub_pattern), main="K-function Estimate for DATASET 4 (subset of FWJJ20)")

#OR

Kest_envplot_4_FWJJ20sub <- plot(envelope(FwJj20_df_sub_pattern, Kest, nsim = 99, correction = "trans"), main="K-function Estimate for DATASET 4 (subset of FWJJ20)")
## Generating 99 simulations of CSR  ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,  99.
## 
## Done.

For DATASET 4(FXJJ20AB)

Kestplot_4_FXJJ20ABsub <- plot(Kest(FXJJ20AB_df_sub_pattern), main="K-function Estimate for DATASET 4 (subset of FXJJ20AB)")

#OR

Kest_envplot_4_FXJJ20ABsub <- plot(envelope(FXJJ20AB_df_sub_pattern, Kest, nsim = 99, correction = "trans"), main="K-function Estimate for DATASET 4 (subset of FXJJ20AB)")
## Generating 99 simulations of CSR  ...
## 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38,
## 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76,
## 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98,  99.
## 
## Done.

Visualization of Results

DATASET 1

False Positive Rate

Rand_StratcolFPRate <- read.csv("~/Desktop/Rand_StratcolFPRate.csv")
ggplot(data=Rand_StratcolFPRate, aes(x=Layers.Compressed, y=False.Positive.Rate)) + geom_point() + labs(title="Randomized Strat Column False Positive Rate")

DATASETS 2 AND 3

Plot 1

FeatureVariability <- read.csv("~/Desktop/ForR_Final.csv")
ggplot(data=FeatureVariability, aes(x=FeatureVariability[,1], y=FeatureVariability[,2])) + geom_point() + labs(title="Coordinate Range Variation as a Function of Feature Variability Area", x="Feature Variability Area as Proportion of Total Area", y="Average Coordinate Range Variation Over All Layer Grouping Tests")

Plot 2

FeatureVariability <- read.csv("~/Desktop/ForR_Final.csv")
ggplot(data=FeatureVariability, aes(x=FeatureVariability[,1], y=FeatureVariability[,3])) + geom_point() + labs(title="Full Strat Column Coordinate Range Variation as a Function of Feature Variability Area", x="Feature Variability Area as Proportion of Total Area", y="Coordinate Range Variation Over Full Strat Column")